# A toy example: defining data
gc <- c(900,100,3,800,190,10)
ac <- c(2*gc[1]+gc[2],gc[2]+2*gc[3],2*gc[4]+gc[5],gc[5]+2*gc[6])
gc1 <- c(gc[1]+gc[2],gc[3],gc[4]+gc[5],gc[6])
gc2 <- c(gc[1],gc[2]+gc[3],gc[4],gc[5]+gc[6])

# A toy example: testing
pvg <- chisq.test(matrix(gc,ncol=3,byrow=TRUE),corr=FALSE)$p.value
pva <- chisq.test(matrix(ac,ncol=2,byrow=TRUE),corr=FALSE)$p.value
pvg1 <- chisq.test(matrix(gc1,ncol=2,byrow=TRUE),corr=FALSE)$p.value
pvg2 <- chisq.test(matrix(gc2,ncol=2,byrow=TRUE),corr=FALSE)$p.value
pvb <- min(pvg1,pvg2)
print(c(pvg,pva,pvb))
pvg.f <- fisher.test(matrix(gc,ncol=3,byrow=TRUE))$p.value
pva.f <- fisher.test(matrix(ac,ncol=2,byrow=TRUE))$p.value
pvg1.f <- fisher.test(matrix(gc1,ncol=2,byrow=TRUE))$p.value
pvg2.f <- fisher.test(matrix(gc2,ncol=2,byrow=TRUE))$p.value
pvb.f <- min(pvg1.f,pvg2.f)
print(c(pvg.f,pva.f,pvb.f))
pvcat <- prop.trend.test(gc[1:3],gc[1:3]+gc[4:6],score=c(0,0.5,1))$p.value
print(pvcat)
gc<-gc*2
ac <- c(2*gc[1]+gc[2],gc[2]+2*gc[3],2*gc[4]+gc[5],gc[5]+2*gc[6])
gc1 <- c(gc[1]+gc[2],gc[3],gc[4]+gc[5],gc[6])
gc2 <- c(gc[1],gc[2]+gc[3],gc[4],gc[5]+gc[6])
pvg <- chisq.test(matrix(gc,ncol=3,byrow=TRUE),corr=FALSE)$p.value
pva <- chisq.test(matrix(ac,ncol=2,byrow=TRUE),corr=FALSE)$p.value
pvg1 <- chisq.test(matrix(gc1,ncol=2,byrow=TRUE),corr=FALSE)$p.value
pvg2 <- chisq.test(matrix(gc2,ncol=2,byrow=TRUE),corr=FALSE)$p.value
pvb <- min(pvg1,pvg2)
print(c(pvg,pva,pvb))
pvg.f <- fisher.test(matrix(gc,ncol=3,byrow=TRUE))$p.value
pva.f <- fisher.test(matrix(ac,ncol=2,byrow=TRUE))$p.value
pvg1.f <- fisher.test(matrix(gc1,ncol=2,byrow=TRUE))$p.value
pvg2.f <- fisher.test(matrix(gc2,ncol=2,byrow=TRUE))$p.value
pvb.f <- min(pvg1.f,pvg2.f)
print(c(pvg.f,pva.f,pvb.f))
pvcat <- prop.trend.test(gc[1:3],gc[1:3]+gc[4:6],score=c(0,0.5,1))$p.value
print(pvcat)

# A toy example: estimation
gc <- c(900,100,3,800,190,10)
ac <- c(2*gc[1]+gc[2],gc[2]+2*gc[3],2*gc[4]+gc[5],gc[5]+2*gc[6])
ci.or <- function(counts,alpha){
 f <- qnorm(1-alpha/2)
 or <- counts[1]*counts[4]/(counts[2]*counts[3])
 sq <- sqrt(1/counts[1]+1/counts[2]+1/counts[3]+1/counts[4])
 upper <- exp(log(or)+f*sq)
 lower <- exp(log(or)-f*sq)
 res <- c(lower,or,upper)
 res
}
print(ci.or(ac,0.05))
print(ci.or(ac,0.01))
gc<-gc*2
ac <- c(2*gc[1]+gc[2],gc[2]+2*gc[3],2*gc[4]+gc[5],gc[5]+2*gc[6])
print(ci.or(ac,0.05))
print(ci.or(ac,0.01))

# Installing R-package SNPassoc (install R version 2.9.2 first)
setwd('D:/courses/Bioinformatics/GWAinpractice')
install.packages(c('haplo.stats','mvtnorm'))
install.packages('SNPassoc_1.4-9.zip',repos=NULL)
library(SNPassoc)

# Data manipulation: loading data
data(SNPs)
SNPs[1:2,1:9]
SNPs.info.pos[1:3,]

# Data manipulation: class snp
table(SNPs[,2])
mySNP<-snp(SNPs$snp10001,sep="")
mySNP[1:7]

# Descriptive analysis: class snp
summary(mySNP)
plot(mySNP,label="snp10001",col="darkgreen")
plot(mySNP,type=pie,label="snp10001",col=c("darkgreen","yellow","red"))

# Data manipulation: class snp
reorder(mySNP,ref="minor")[1:7]
gg<-c("het","hom1","hom1","hom2","hom1","hom1","het","het")
snp(gg,name.genotypes=c("hom1","het","hom2"))
myData<-setupSNP(data=SNPs,colSNPs=6:40,sep="")
myData[1:2,1:8]

# Data manipulation: class setupSNP
myData.o<-setupSNP(SNPs, colSNPs=6:40, sort=TRUE,info=SNPs.info.pos, sep="")
myData.o[1:2,1:8]

# Descriptive analysis: class setupSNP
labels(myData)[1:3]
summary(myData)
plot(myData,which=20)

# Descriptive analysis: missing data
plotMissing(myData)

# Descriptive analysis: Hardy-Weinberg equilibrium
res<-tableHWE(myData)
res
res<- tableHWE(myData,strata=myData$sex)
res

# GWA analysis: loading data
data(HapMap)
HapMap[1:2,1:5]
HapMap.SNPs.pos[1:3,]

# GWA analysis: class WGassociation
myDat.HapMap<-setupSNP(HapMap, colSNPs=3:9307, sort = TRUE,info=HapMap.SNPs.pos, sep="")
myDat.HapMap[1:2,1:5]
resHapMap<-WGassociation(group, data=myDat.HapMap, model="log-add")
plot(resHapMap, whole=FALSE, print.label.SNPs = FALSE)
print(resHapMap[1:5,])
summary(resHapMap)
plot(resHapMap, whole=TRUE, print.label.SNPs = FALSE)
resHapMap.scan<-scanWGassociation(group, data=myDat.HapMap, model="log-add")
print(resHapMap.scan[1:5,])
summary(resHapMap.scan)

# Performing variety of analyses: significant SNPs
getSignificantSNPs(resHapMap,chromosome=5)

# Performing variety of analyses: binary trait
association(casco~snp(snp10001,sep=""), data=SNPs)
myData<-setupSNP(data=SNPs,colSNPs=6:40,sep="")
association(casco~snp10001, data=myData)
association(casco~snp10001, data=myData, model=c("cod","log"))

# Performing variety of analyses: adjustment
association(casco~sex+snp10001+blood.pre, data=myData)

# Performing variety of analyses: stratification
association(casco~snp10001+blood.pre+strata(sex), data=myData, model="dom")

# Performing variety of analyses: subsetting
association(casco~snp10001+blood.pre, data=myData,subset=sex=="Male")

# Performing variety of analyses: continuous trait
association(log(protein)~snp100029+blood.pre, data=myData)

# Medium scale analysis
sigSNPs<-getSignificantSNPs(resHapMap,chromosome=5,sig=5e-8)$column
myDat2<-setupSNP(HapMap, colSNPs=sigSNPs, sep="")
resHapMap2<-WGassociation(group~1, data=myDat2)
plot(resHapMap2,cex=0.8)
myData<-setupSNP(SNPs, colSNPs=6:40, sep="")
myData.o<-setupSNP(SNPs, colSNPs=6:40, sort=TRUE,info=SNPs.info.pos, sep="")
ans<-WGassociation(protein~1,data=myData.o)
install.packages('Hmisc')
library(Hmisc)
SNP<-pvalues(ans)
out<-latex(SNP,file="ans1.tex", where="'h",caption="Summary of case-control study for SNPs data set.",center="centering", longtable=TRUE, na.blank=TRUE, size="scriptsize", collabel.just=c("c"), lines.page=50,rownamesTexCmd="bfseries")
WGstats(ans,dig=5)
plot(ans)

# Haplotypes analysis using haplo.stats
datSNP<-setupSNP(SNPs,6:40,sep="")
tag.SNPs<-c("snp100019", "snp10001", "snp100029")
geno<-make.geno(datSNP,tag.SNPs)
mod<-haplo.glm(log(protein)~geno,data=SNPs,family=gaussian,locus.label=tag.SNPs,allele.lev=attributes(geno)$unique.alleles,control = haplo.glm.control(haplo.freq.min=0.05))
mod
intervals(mod)
